load("XSTSF_production.RData")
source('functions.R')
f0_all_ct <- f0_all_pre %>% filter(focus_condition == 'ct' ) %>%
group_by(speaker) %>%
mutate(norm_f0 = scale(log(f0))) %>%
ungroup()
f0_di_ct_lcmh <- f0_all_ct %>%
filter(syntax %in% c('L', 'M') & diortri == 'di') %>%
mutate(sandhi_tone = case_when(sandhi_tone == 'HLLM' ~ 'HMML',
sandhi_tone == 'LLHL' ~ 'LLRF',
.default = sandhi_tone)) %>%
filter(!ind_no %in% c('S2_1_ct', 'S2_11_ct', 'S2_27_ct', 'S3_5_ct', 'S3_19_ct', 'S5_27_ct',
'S2_44_ct', 'S3_37_ct', 'S3_44_ct', 'S6_16_ct', 'S6_31_ct', 'S6_39_ct', 'S7_33_ct')) %>%
filter(is.na(sandhi_tone) == FALSE)
f0_di_ct_lcmh_h <- f0_di_ct_lcmh %>% filter( grepl('^H', sync_tone1))
f0_di_ct_lcmh_l <- f0_di_ct_lcmh %>%
filter( grepl('^[LR]', sync_tone1))
# mutate(sandhi_tone = ifelse(sandhi_tone == 'LLHH' & hist_tone1 == 'yangping',
#'LLLM', sandhi_tone))
f0_di_ct_lc_h <- f0_di_ct_lcmh_h %>% filter(syntax == 'L')
f0_di_ct_mh_h <- f0_di_ct_lcmh_h %>% filter(syntax == 'M')
f0_di_ct_lc_l <- f0_di_ct_lcmh_l %>% filter(syntax == 'L')
f0_di_ct_mh_l <- f0_di_ct_lcmh_l %>% filter(syntax == 'M')
f0_di_ct_lcmh_hp <- f0_di_ct_lcmh_h %>% filter(hist_tone1 == 'yinping')
f0_di_ct_lcmh_hs <- f0_di_ct_lcmh_h %>% filter(hist_tone1 == 'yinshang')
f0_di_ct_lcmh_lp <- f0_di_ct_lcmh_l %>% filter(hist_tone1 == 'yangping')
f0_di_ct_lcmh_ls <- f0_di_ct_lcmh_l %>% filter(hist_tone1 == 'yangshang')
# yinping-initial LC & MH
ggplotly(draw_by(f0_di_ct_lcmh_hp, 'speaker'), tooltip = c('text', 'x'))
# yinshang-initial LC & MH
ggplotly(draw_by(f0_di_ct_lcmh_hs, 'speaker'), tooltip = c('text', 'x'))
# yangping-initial LC & MH
ggplotly(draw_by(f0_di_ct_lcmh_lp, 'speaker'), tooltip = c('text', 'x'))
# yangshang-initial LC & MH
ggplotly(draw_by(f0_di_ct_lcmh_ls, 'speaker'), tooltip = c('text', 'x'))
unique(f0_di_ct_lcmh_l$sandhi_tone) # check the labels
## [1] "LLHH" "LLRF" "LMML" "LLLM"
p_cluster(f0_di_ct_lcmh_l, sandhi_tone)
p_cluster(f0_di_ct_lcmh_lp, sandhi_tone)
p_cluster(f0_di_ct_lcmh_ls, sandhi_tone)
# monosyllabic tone (initial tone)
distri_count(f0_di_ct_lcmh_l, speaker, sync_tone1)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
distri_count(f0_di_ct_lcmh_l, hist_tone1, sync_tone1)
# by second tone
distri_count(f0_di_ct_lcmh_l, sync_tone2, sandhi_tone)
distri_count(f0_di_ct_lcmh_l, sync_tone2, sandhi_tone, hist_tone1)
distri_count(f0_di_ct_lc_l, sync_tone2, sandhi_tone)+ggtitle('Lexical compounds')
distri_count(f0_di_ct_mh_l, sync_tone2, sandhi_tone)+ggtitle('Adjective-Noun phrases')
distri_count(f0_di_ct_lcmh_l, hist_tone2, sandhi_tone)
# by first tone
distri_count(f0_di_ct_lcmh_l, sync_tone1, sandhi_tone)
distri_count(f0_di_ct_lcmh_l, hist_tone1, sandhi_tone)
distri_count(f0_di_ct_lcmh_l, hist_tone1, sandhi_tone, syntax)
# by speaker
distri_count(f0_di_ct_lcmh_l, speaker, sandhi_tone, hist_tone1)
# by item
distri_count(f0_di_ct_lcmh_lp, citation_no, sandhi_tone)
distri_count(f0_di_ct_lcmh_ls, citation_no, sandhi_tone)
distri_prop(f0_di_ct_lcmh_l, hist_tone1, sync_tone1)
distri_prop(f0_di_ct_lcmh_l, sync_tone2, hist_tone2)
distri_prop(f0_di_ct_lcmh_l, syntax, sandhi_tone)
distri_prop(f0_di_ct_lcmh_l, hist_tone1, sandhi_tone)
distri_prop(f0_di_ct_lcmh_l, sync_tone2, sandhi_tone, hist_tone1)
distri_prop(f0_di_ct_lcmh_l, hist_tone2, sandhi_tone, hist_tone1)
distri_prop(f0_di_ct_lcmh_l, speaker, sandhi_tone, hist_tone1)
distri_prop(f0_di_ct_lcmh_ls, speaker, sandhi_tone, sync_tone2)
distri_prop(f0_di_ct_lcmh_l, syntax, sandhi_tone, hist_tone1)
# Gradience
f0_di_ct_lcmh_l_llhh <- filter(f0_di_ct_lcmh_l, sandhi_tone %in% c('LLHH', 'LLLM')) %>%
mutate(sandhi_tone = paste0(sandhi_tone, '_', hist_tone1))
p_sub_cluster(f0_di_ct_lcmh_l_llhh, sandhi_tone)
## `summarise()` has grouped output by 'sandhi_tone', 'syllable_no'. You can
## override using the `.groups` argument.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# second tone [synchronic] & syntax
p_sub_cluster(f0_di_ct_lcmh_l, sync_tone2, sandhi_tone)
# LLLM
f0_di_ct_lp_lllm <- f0_di_ct_lcmh_lp %>% filter(sandhi_tone == 'LLLM')
p_cluster(f0_di_ct_lp_lllm, sync_tone2, 'sandhi_tone')
p_sub_cluster(f0_di_ct_lp_lllm, sync_tone2, sandhi_tone)
# LLLM: LC
f0_di_ct_lc_lp_lllm <- f0_di_ct_lcmh_lp %>% filter(sandhi_tone == 'LLLM' & syntax == 'L')
p_sub_cluster(f0_di_ct_lc_lp_lllm, sync_tone2, sandhi_tone)
# LLLM: AN
f0_di_ct_mh_lp_lllm <- f0_di_ct_lcmh_lp %>% filter(sandhi_tone == 'LLLM' & syntax == 'M')
p_sub_cluster(f0_di_ct_mh_lp_lllm, sync_tone2, sandhi_tone)
# yangping & yangshang
p_sub_cluster(f0_di_ct_lcmh_lp, sync_tone2, sandhi_tone)
p_sub_cluster(f0_di_ct_lp_lllm, sync_tone2)
p_sub_cluster(f0_di_ct_lp_lllm, sync_tone2, syntax)
p_sub_cluster(f0_di_ct_lcmh_ls, sync_tone2, sandhi_tone)
f0_di_ct_lp_lllm %>% filter(sync_tone2 == 'HL')
## # A tibble: 20 × 20
## speaker token diortri focus_condition focus_no citation_tone sandhi_tone
## <fct> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 S4 皮袄 di ct NA RF_HL LLLM
## 2 S4 皮袄 di ct NA RF_HL LLLM
## 3 S4 皮袄 di ct NA RF_HL LLLM
## 4 S4 皮袄 di ct NA RF_HL LLLM
## 5 S4 皮袄 di ct NA RF_HL LLLM
## 6 S4 皮袄 di ct NA RF_HL LLLM
## 7 S4 皮袄 di ct NA RF_HL LLLM
## 8 S4 皮袄 di ct NA RF_HL LLLM
## 9 S4 皮袄 di ct NA RF_HL LLLM
## 10 S4 皮袄 di ct NA RF_HL LLLM
## 11 S4 皮袄 di ct NA RF_HL LLLM
## 12 S4 皮袄 di ct NA RF_HL LLLM
## 13 S4 皮袄 di ct NA RF_HL LLLM
## 14 S4 皮袄 di ct NA RF_HL LLLM
## 15 S4 皮袄 di ct NA RF_HL LLLM
## 16 S4 皮袄 di ct NA RF_HL LLLM
## 17 S4 皮袄 di ct NA RF_HL LLLM
## 18 S4 皮袄 di ct NA RF_HL LLLM
## 19 S4 皮袄 di ct NA RF_HL LLLM
## 20 S4 皮袄 di ct NA RF_HL LLLM
## # ℹ 13 more variables: syllable_no <dbl>, citation_no <fct>, ind_no <chr>,
## # sync_tone1 <chr>, sync_tone2 <chr>, sync_tone3 <chr>, hist_tone1 <chr>,
## # hist_tone2 <chr>, hist_tone3 <chr>, syntax <chr>, time <fct>, f0 <dbl>,
## # norm_f0 <dbl[,1]>
# data preparation
f0_di_ct_lcmh_l_kmeans <- f0_di_ct_lcmh_l %>%
select(-diortri, -syllable_no, -focus_no, -f0) %>%
spread(time, norm_f0)
# k-means clustering
cluster_model <- k_means_clustering(f0_di_ct_lcmh_l_kmeans)
kml(cluster_model, nbClusters = 2:10)
## ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
kml::plot(cluster_model, 3, parTraj=parTRAJ(col="clusters"))
kml::plot(cluster_model, 4, parTraj=parTRAJ(col="clusters"))
plotAllCriterion(cluster_model)
# get cluster results
f0_di_ct_lcmh_l_kmeans <- f0_di_ct_lcmh_l_kmeans %>%
mutate(cluster4 = getClusters(cluster_model, 4),
cluster3 = getClusters(cluster_model, 3),
sub_cluster = paste0(sandhi_tone, '_', cluster4)) %>%
mutate(sandhi_tone = ifelse(cluster4 == 'C' & sandhi_tone == 'LLLM', 'LLRF', sandhi_tone))
# heatmap distribution
cluster_solution <- wide_to_long(f0_di_ct_lcmh_l_kmeans)
heatmap_df <- heatmap_data(cluster_solution, cluster3)
compare_cluster(heatmap_df, 'cluster3')
cluster_solution <- wide_to_long(f0_di_ct_lcmh_l_kmeans)
heatmap_df <- heatmap_data(cluster_solution, cluster4)
compare_cluster(heatmap_df, 'cluster4')
cluster_solution %>% filter(cluster4 == 'A' & sandhi_tone == 'LLLM' & time == 1)
## # A tibble: 15 × 20
## speaker token focus_condition citation_tone sandhi_tone citation_no ind_no
## <fct> <chr> <chr> <chr> <chr> <fct> <chr>
## 1 S1 黄花 ct RF_HH LLLM 48 S1_48_ct
## 2 S2 杨梅 ct RF_RF LLLM 35 S2_35_ct
## 3 S2 黄树 ct RF_LH LLLM 52 S2_52_ct
## 4 S3 杨梅 ct RF_LH LLLM 35 S3_35_ct
## 5 S5 杨梅 ct RF_RF LLLM 35 S5_35_ct
## 6 S5 皮鞋 ct RF_RF LLLM 39 S5_39_ct
## 7 S5 黄树 ct RF_LH LLLM 52 S5_52_ct
## 8 S5 黄绳 ct RF_RF LLLM 28 S5_28_ct
## 9 S5 黄豆 ct RF_LH LLLM 41 S5_41_ct
## 10 S7 黄树 ct RF_LH LLLM 52 S7_52_ct
## 11 S7 黄绳 ct RF_RF LLLM 28 S7_28_ct
## 12 S7 黄豆 ct RF_LH LLLM 41 S7_41_ct
## 13 S8 杨梅 ct RF_RF LLLM 35 S8_35_ct
## 14 S8 黄绳 ct RF_RF LLLM 28 S8_28_ct
## 15 S8 黄豆 ct RF_LH LLLM 41 S8_41_ct
## # ℹ 13 more variables: sync_tone1 <chr>, sync_tone2 <chr>, sync_tone3 <chr>,
## # hist_tone1 <chr>, hist_tone2 <chr>, hist_tone3 <chr>, syntax <chr>,
## # cluster4 <fct>, cluster3 <fct>, sub_cluster <chr>, time <int>,
## # norm_f0 <dbl>, syllable_no <chr>
Examine mismatches
cluster_lllm <- cluster_solution %>% filter(sandhi_tone == 'LLLM')
p_cluster(cluster_lllm, sub_cluster)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.